library(titanic)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom 1.0.2 ✔ rsample 1.1.1
## ✔ dials 1.1.0 ✔ tune 1.0.1
## ✔ infer 1.0.4 ✔ workflows 1.1.2
## ✔ modeldata 1.1.0 ✔ workflowsets 1.0.0
## ✔ parsnip 1.0.3 ✔ yardstick 1.1.0
## ✔ recipes 1.0.4
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## • Use suppressPackageStartupMessages() to eliminate package startup messages
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loaded glmnet 4.1-6
library(tidymodels)
library(e1071)
##
## Attaching package: 'e1071'
##
## The following object is masked from 'package:tune':
##
## tune
##
## The following object is masked from 'package:rsample':
##
## permutations
##
## The following object is masked from 'package:parsnip':
##
## tune
library(ROCR)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggcorrplot)
library(splines)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(leaps)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(skimr)
library(mice)
##
## Attaching package: 'mice'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## cbind, rbind
productfailure = read_csv("train.csv")
## Rows: 26570 Columns: 26
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): product_code, attribute_0, attribute_1, failure
## dbl (22): id, loading, attribute_2, attribute_3, measurement_0, measurement_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(productfailure)
## spc_tbl_ [26,570 × 26] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ id : num [1:26570] 0 1 2 3 4 5 6 7 8 9 ...
## $ product_code : chr [1:26570] "A" "A" "A" "A" ...
## $ loading : num [1:26570] 80.1 84.9 82.4 101.1 188.1 ...
## $ attribute_0 : chr [1:26570] "material_7" "material_7" "material_7" "material_7" ...
## $ attribute_1 : chr [1:26570] "material_8" "material_8" "material_8" "material_8" ...
## $ attribute_2 : num [1:26570] 9 9 9 9 9 9 9 9 9 9 ...
## $ attribute_3 : num [1:26570] 5 5 5 5 5 5 5 5 5 5 ...
## $ measurement_0 : num [1:26570] 7 14 12 13 9 11 12 4 9 10 ...
## $ measurement_1 : num [1:26570] 8 3 1 2 2 4 2 8 6 4 ...
## $ measurement_2 : num [1:26570] 4 3 5 6 8 0 4 8 5 7 ...
## $ measurement_3 : num [1:26570] 18 18.2 18.1 17.3 19.3 ...
## $ measurement_4 : num [1:26570] 12.5 11.5 11.7 11.2 12.9 ...
## $ measurement_5 : num [1:26570] 15.7 17.7 16.7 18.6 17 ...
## $ measurement_6 : num [1:26570] 19.3 17.9 18.2 18.3 15.7 ...
## $ measurement_7 : num [1:26570] 11.7 12.7 12.7 12.6 11.3 ...
## $ measurement_8 : num [1:26570] 20.2 17.9 18.3 19.1 18.1 ...
## $ measurement_9 : num [1:26570] 10.7 12.4 12.7 12.5 10.3 ...
## $ measurement_10: num [1:26570] 15.9 17.9 15.6 16.3 17.1 ...
## $ measurement_11: num [1:26570] 17.6 17.9 NA 18.4 19.9 ...
## $ measurement_12: num [1:26570] 15.2 11.8 13.8 10 12.4 ...
## $ measurement_13: num [1:26570] 15 14.7 16.7 15.2 16.2 ...
## $ measurement_14: num [1:26570] NA 15.4 18.6 15.6 12.8 ...
## $ measurement_15: num [1:26570] 13 14.4 14.1 16.2 13.2 ...
## $ measurement_16: num [1:26570] 14.7 15.6 17.9 17.2 16.4 ...
## $ measurement_17: num [1:26570] 764 682 663 826 580 ...
## $ failure : chr [1:26570] "No" "No" "No" "No" ...
## - attr(*, "spec")=
## .. cols(
## .. id = col_double(),
## .. product_code = col_character(),
## .. loading = col_double(),
## .. attribute_0 = col_character(),
## .. attribute_1 = col_character(),
## .. attribute_2 = col_double(),
## .. attribute_3 = col_double(),
## .. measurement_0 = col_double(),
## .. measurement_1 = col_double(),
## .. measurement_2 = col_double(),
## .. measurement_3 = col_double(),
## .. measurement_4 = col_double(),
## .. measurement_5 = col_double(),
## .. measurement_6 = col_double(),
## .. measurement_7 = col_double(),
## .. measurement_8 = col_double(),
## .. measurement_9 = col_double(),
## .. measurement_10 = col_double(),
## .. measurement_11 = col_double(),
## .. measurement_12 = col_double(),
## .. measurement_13 = col_double(),
## .. measurement_14 = col_double(),
## .. measurement_15 = col_double(),
## .. measurement_16 = col_double(),
## .. measurement_17 = col_double(),
## .. failure = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
summary(productfailure)
## id product_code loading attribute_0
## Min. : 0 Length:26570 Min. : 33.16 Length:26570
## 1st Qu.: 6642 Class :character 1st Qu.: 99.99 Class :character
## Median :13284 Mode :character Median :122.39 Mode :character
## Mean :13284 Mean :127.83
## 3rd Qu.:19927 3rd Qu.:149.15
## Max. :26569 Max. :385.86
## NA's :250
## attribute_1 attribute_2 attribute_3 measurement_0
## Length:26570 Min. :5.000 Min. :5.00 Min. : 0.000
## Class :character 1st Qu.:6.000 1st Qu.:6.00 1st Qu.: 4.000
## Mode :character Median :6.000 Median :8.00 Median : 7.000
## Mean :6.754 Mean :7.24 Mean : 7.416
## 3rd Qu.:8.000 3rd Qu.:8.00 3rd Qu.:10.000
## Max. :9.000 Max. :9.00 Max. :29.000
##
## measurement_1 measurement_2 measurement_3 measurement_4
## Min. : 0.000 Min. : 0.000 Min. :13.97 Min. : 8.008
## 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:17.12 1st Qu.:11.051
## Median : 8.000 Median : 6.000 Median :17.79 Median :11.733
## Mean : 8.233 Mean : 6.257 Mean :17.79 Mean :11.732
## 3rd Qu.:11.000 3rd Qu.: 8.000 3rd Qu.:18.47 3rd Qu.:12.410
## Max. :29.000 Max. :24.000 Max. :21.50 Max. :16.484
## NA's :381 NA's :538
## measurement_5 measurement_6 measurement_7 measurement_8
## Min. :12.07 Min. :12.71 Min. : 7.968 Min. :15.22
## 1st Qu.:16.44 1st Qu.:16.84 1st Qu.:11.045 1st Qu.:18.34
## Median :17.13 Median :17.52 Median :11.712 Median :19.02
## Mean :17.13 Mean :17.51 Mean :11.717 Mean :19.02
## 3rd Qu.:17.80 3rd Qu.:18.18 3rd Qu.:12.391 3rd Qu.:19.71
## Max. :21.43 Max. :21.54 Max. :15.419 Max. :23.81
## NA's :676 NA's :796 NA's :937 NA's :1048
## measurement_9 measurement_10 measurement_11 measurement_12
## Min. : 7.537 Min. : 9.323 Min. :12.46 Min. : 5.167
## 1st Qu.:10.757 1st Qu.:15.209 1st Qu.:18.17 1st Qu.:10.703
## Median :11.430 Median :16.127 Median :19.21 Median :11.717
## Mean :11.431 Mean :16.118 Mean :19.17 Mean :11.703
## 3rd Qu.:12.102 3rd Qu.:17.025 3rd Qu.:20.21 3rd Qu.:12.709
## Max. :15.412 Max. :22.479 Max. :25.64 Max. :17.663
## NA's :1227 NA's :1300 NA's :1468 NA's :1601
## measurement_13 measurement_14 measurement_15 measurement_16
## Min. :10.89 Min. : 9.14 Min. : 9.104 Min. : 9.701
## 1st Qu.:14.89 1st Qu.:15.06 1st Qu.:13.957 1st Qu.:15.268
## Median :15.63 Median :16.04 Median :14.969 Median :16.436
## Mean :15.65 Mean :16.05 Mean :14.996 Mean :16.461
## 3rd Qu.:16.37 3rd Qu.:17.08 3rd Qu.:16.018 3rd Qu.:17.628
## Max. :22.71 Max. :22.30 Max. :21.626 Max. :24.094
## NA's :1774 NA's :1874 NA's :2009 NA's :2110
## measurement_17 failure
## Min. : 196.8 Length:26570
## 1st Qu.: 619.0 Class :character
## Median : 701.0 Mode :character
## Mean : 701.3
## 3rd Qu.: 784.1
## Max. :1312.8
## NA's :2284
productfailure
## # A tibble: 26,570 × 26
## id produ…¹ loading attri…² attri…³ attri…⁴ attri…⁵ measu…⁶ measu…⁷ measu…⁸
## <dbl> <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 A 80.1 materi… materi… 9 5 7 8 4
## 2 1 A 84.9 materi… materi… 9 5 14 3 3
## 3 2 A 82.4 materi… materi… 9 5 12 1 5
## 4 3 A 101. materi… materi… 9 5 13 2 6
## 5 4 A 188. materi… materi… 9 5 9 2 8
## 6 5 A 75.4 materi… materi… 9 5 11 4 0
## 7 6 A 162. materi… materi… 9 5 12 2 4
## 8 7 A 178. materi… materi… 9 5 4 8 8
## 9 8 A 110. materi… materi… 9 5 9 6 5
## 10 9 A 98.7 materi… materi… 9 5 10 4 7
## # … with 26,560 more rows, 16 more variables: measurement_3 <dbl>,
## # measurement_4 <dbl>, measurement_5 <dbl>, measurement_6 <dbl>,
## # measurement_7 <dbl>, measurement_8 <dbl>, measurement_9 <dbl>,
## # measurement_10 <dbl>, measurement_11 <dbl>, measurement_12 <dbl>,
## # measurement_13 <dbl>, measurement_14 <dbl>, measurement_15 <dbl>,
## # measurement_16 <dbl>, measurement_17 <dbl>, failure <chr>, and abbreviated
## # variable names ¹product_code, ²attribute_0, ³attribute_1, ⁴attribute_2, …
productfailure = productfailure %>% mutate(failure = as_factor(failure)) %>%
mutate(product_code = as_factor(product_code)) %>% mutate(attribute_0 = as_factor(attribute_0)) %>%
mutate(attribute_1 = as_factor(attribute_1))
summary(productfailure)
## id product_code loading attribute_0
## Min. : 0 A:5100 Min. : 33.16 material_7:21320
## 1st Qu.: 6642 B:5250 1st Qu.: 99.99 material_5: 5250
## Median :13284 C:5765 Median :122.39
## Mean :13284 D:5112 Mean :127.83
## 3rd Qu.:19927 E:5343 3rd Qu.:149.15
## Max. :26569 Max. :385.86
## NA's :250
## attribute_1 attribute_2 attribute_3 measurement_0
## material_8:10865 Min. :5.000 Min. :5.00 Min. : 0.000
## material_5:10362 1st Qu.:6.000 1st Qu.:6.00 1st Qu.: 4.000
## material_6: 5343 Median :6.000 Median :8.00 Median : 7.000
## Mean :6.754 Mean :7.24 Mean : 7.416
## 3rd Qu.:8.000 3rd Qu.:8.00 3rd Qu.:10.000
## Max. :9.000 Max. :9.00 Max. :29.000
##
## measurement_1 measurement_2 measurement_3 measurement_4
## Min. : 0.000 Min. : 0.000 Min. :13.97 Min. : 8.008
## 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:17.12 1st Qu.:11.051
## Median : 8.000 Median : 6.000 Median :17.79 Median :11.733
## Mean : 8.233 Mean : 6.257 Mean :17.79 Mean :11.732
## 3rd Qu.:11.000 3rd Qu.: 8.000 3rd Qu.:18.47 3rd Qu.:12.410
## Max. :29.000 Max. :24.000 Max. :21.50 Max. :16.484
## NA's :381 NA's :538
## measurement_5 measurement_6 measurement_7 measurement_8
## Min. :12.07 Min. :12.71 Min. : 7.968 Min. :15.22
## 1st Qu.:16.44 1st Qu.:16.84 1st Qu.:11.045 1st Qu.:18.34
## Median :17.13 Median :17.52 Median :11.712 Median :19.02
## Mean :17.13 Mean :17.51 Mean :11.717 Mean :19.02
## 3rd Qu.:17.80 3rd Qu.:18.18 3rd Qu.:12.391 3rd Qu.:19.71
## Max. :21.43 Max. :21.54 Max. :15.419 Max. :23.81
## NA's :676 NA's :796 NA's :937 NA's :1048
## measurement_9 measurement_10 measurement_11 measurement_12
## Min. : 7.537 Min. : 9.323 Min. :12.46 Min. : 5.167
## 1st Qu.:10.757 1st Qu.:15.209 1st Qu.:18.17 1st Qu.:10.703
## Median :11.430 Median :16.127 Median :19.21 Median :11.717
## Mean :11.431 Mean :16.118 Mean :19.17 Mean :11.703
## 3rd Qu.:12.102 3rd Qu.:17.025 3rd Qu.:20.21 3rd Qu.:12.709
## Max. :15.412 Max. :22.479 Max. :25.64 Max. :17.663
## NA's :1227 NA's :1300 NA's :1468 NA's :1601
## measurement_13 measurement_14 measurement_15 measurement_16
## Min. :10.89 Min. : 9.14 Min. : 9.104 Min. : 9.701
## 1st Qu.:14.89 1st Qu.:15.06 1st Qu.:13.957 1st Qu.:15.268
## Median :15.63 Median :16.04 Median :14.969 Median :16.436
## Mean :15.65 Mean :16.05 Mean :14.996 Mean :16.461
## 3rd Qu.:16.37 3rd Qu.:17.08 3rd Qu.:16.018 3rd Qu.:17.628
## Max. :22.71 Max. :22.30 Max. :21.626 Max. :24.094
## NA's :1774 NA's :1874 NA's :2009 NA's :2110
## measurement_17 failure
## Min. : 196.8 No :20921
## 1st Qu.: 619.0 Yes: 5649
## Median : 701.0
## Mean : 701.3
## 3rd Qu.: 784.1
## Max. :1312.8
## NA's :2284
str(productfailure)
## tibble [26,570 × 26] (S3: tbl_df/tbl/data.frame)
## $ id : num [1:26570] 0 1 2 3 4 5 6 7 8 9 ...
## $ product_code : Factor w/ 5 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ loading : num [1:26570] 80.1 84.9 82.4 101.1 188.1 ...
## $ attribute_0 : Factor w/ 2 levels "material_7","material_5": 1 1 1 1 1 1 1 1 1 1 ...
## $ attribute_1 : Factor w/ 3 levels "material_8","material_5",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ attribute_2 : num [1:26570] 9 9 9 9 9 9 9 9 9 9 ...
## $ attribute_3 : num [1:26570] 5 5 5 5 5 5 5 5 5 5 ...
## $ measurement_0 : num [1:26570] 7 14 12 13 9 11 12 4 9 10 ...
## $ measurement_1 : num [1:26570] 8 3 1 2 2 4 2 8 6 4 ...
## $ measurement_2 : num [1:26570] 4 3 5 6 8 0 4 8 5 7 ...
## $ measurement_3 : num [1:26570] 18 18.2 18.1 17.3 19.3 ...
## $ measurement_4 : num [1:26570] 12.5 11.5 11.7 11.2 12.9 ...
## $ measurement_5 : num [1:26570] 15.7 17.7 16.7 18.6 17 ...
## $ measurement_6 : num [1:26570] 19.3 17.9 18.2 18.3 15.7 ...
## $ measurement_7 : num [1:26570] 11.7 12.7 12.7 12.6 11.3 ...
## $ measurement_8 : num [1:26570] 20.2 17.9 18.3 19.1 18.1 ...
## $ measurement_9 : num [1:26570] 10.7 12.4 12.7 12.5 10.3 ...
## $ measurement_10: num [1:26570] 15.9 17.9 15.6 16.3 17.1 ...
## $ measurement_11: num [1:26570] 17.6 17.9 NA 18.4 19.9 ...
## $ measurement_12: num [1:26570] 15.2 11.8 13.8 10 12.4 ...
## $ measurement_13: num [1:26570] 15 14.7 16.7 15.2 16.2 ...
## $ measurement_14: num [1:26570] NA 15.4 18.6 15.6 12.8 ...
## $ measurement_15: num [1:26570] 13 14.4 14.1 16.2 13.2 ...
## $ measurement_16: num [1:26570] 14.7 15.6 17.9 17.2 16.4 ...
## $ measurement_17: num [1:26570] 764 682 663 826 580 ...
## $ failure : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 2 1 1 ...
summary(productfailure)
## id product_code loading attribute_0
## Min. : 0 A:5100 Min. : 33.16 material_7:21320
## 1st Qu.: 6642 B:5250 1st Qu.: 99.99 material_5: 5250
## Median :13284 C:5765 Median :122.39
## Mean :13284 D:5112 Mean :127.83
## 3rd Qu.:19927 E:5343 3rd Qu.:149.15
## Max. :26569 Max. :385.86
## NA's :250
## attribute_1 attribute_2 attribute_3 measurement_0
## material_8:10865 Min. :5.000 Min. :5.00 Min. : 0.000
## material_5:10362 1st Qu.:6.000 1st Qu.:6.00 1st Qu.: 4.000
## material_6: 5343 Median :6.000 Median :8.00 Median : 7.000
## Mean :6.754 Mean :7.24 Mean : 7.416
## 3rd Qu.:8.000 3rd Qu.:8.00 3rd Qu.:10.000
## Max. :9.000 Max. :9.00 Max. :29.000
##
## measurement_1 measurement_2 measurement_3 measurement_4
## Min. : 0.000 Min. : 0.000 Min. :13.97 Min. : 8.008
## 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:17.12 1st Qu.:11.051
## Median : 8.000 Median : 6.000 Median :17.79 Median :11.733
## Mean : 8.233 Mean : 6.257 Mean :17.79 Mean :11.732
## 3rd Qu.:11.000 3rd Qu.: 8.000 3rd Qu.:18.47 3rd Qu.:12.410
## Max. :29.000 Max. :24.000 Max. :21.50 Max. :16.484
## NA's :381 NA's :538
## measurement_5 measurement_6 measurement_7 measurement_8
## Min. :12.07 Min. :12.71 Min. : 7.968 Min. :15.22
## 1st Qu.:16.44 1st Qu.:16.84 1st Qu.:11.045 1st Qu.:18.34
## Median :17.13 Median :17.52 Median :11.712 Median :19.02
## Mean :17.13 Mean :17.51 Mean :11.717 Mean :19.02
## 3rd Qu.:17.80 3rd Qu.:18.18 3rd Qu.:12.391 3rd Qu.:19.71
## Max. :21.43 Max. :21.54 Max. :15.419 Max. :23.81
## NA's :676 NA's :796 NA's :937 NA's :1048
## measurement_9 measurement_10 measurement_11 measurement_12
## Min. : 7.537 Min. : 9.323 Min. :12.46 Min. : 5.167
## 1st Qu.:10.757 1st Qu.:15.209 1st Qu.:18.17 1st Qu.:10.703
## Median :11.430 Median :16.127 Median :19.21 Median :11.717
## Mean :11.431 Mean :16.118 Mean :19.17 Mean :11.703
## 3rd Qu.:12.102 3rd Qu.:17.025 3rd Qu.:20.21 3rd Qu.:12.709
## Max. :15.412 Max. :22.479 Max. :25.64 Max. :17.663
## NA's :1227 NA's :1300 NA's :1468 NA's :1601
## measurement_13 measurement_14 measurement_15 measurement_16
## Min. :10.89 Min. : 9.14 Min. : 9.104 Min. : 9.701
## 1st Qu.:14.89 1st Qu.:15.06 1st Qu.:13.957 1st Qu.:15.268
## Median :15.63 Median :16.04 Median :14.969 Median :16.436
## Mean :15.65 Mean :16.05 Mean :14.996 Mean :16.461
## 3rd Qu.:16.37 3rd Qu.:17.08 3rd Qu.:16.018 3rd Qu.:17.628
## Max. :22.71 Max. :22.30 Max. :21.626 Max. :24.094
## NA's :1774 NA's :1874 NA's :2009 NA's :2110
## measurement_17 failure
## Min. : 196.8 No :20921
## 1st Qu.: 619.0 Yes: 5649
## Median : 701.0
## Mean : 701.3
## 3rd Qu.: 784.1
## Max. :1312.8
## NA's :2284
str(productfailure)
## tibble [26,570 × 26] (S3: tbl_df/tbl/data.frame)
## $ id : num [1:26570] 0 1 2 3 4 5 6 7 8 9 ...
## $ product_code : Factor w/ 5 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ loading : num [1:26570] 80.1 84.9 82.4 101.1 188.1 ...
## $ attribute_0 : Factor w/ 2 levels "material_7","material_5": 1 1 1 1 1 1 1 1 1 1 ...
## $ attribute_1 : Factor w/ 3 levels "material_8","material_5",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ attribute_2 : num [1:26570] 9 9 9 9 9 9 9 9 9 9 ...
## $ attribute_3 : num [1:26570] 5 5 5 5 5 5 5 5 5 5 ...
## $ measurement_0 : num [1:26570] 7 14 12 13 9 11 12 4 9 10 ...
## $ measurement_1 : num [1:26570] 8 3 1 2 2 4 2 8 6 4 ...
## $ measurement_2 : num [1:26570] 4 3 5 6 8 0 4 8 5 7 ...
## $ measurement_3 : num [1:26570] 18 18.2 18.1 17.3 19.3 ...
## $ measurement_4 : num [1:26570] 12.5 11.5 11.7 11.2 12.9 ...
## $ measurement_5 : num [1:26570] 15.7 17.7 16.7 18.6 17 ...
## $ measurement_6 : num [1:26570] 19.3 17.9 18.2 18.3 15.7 ...
## $ measurement_7 : num [1:26570] 11.7 12.7 12.7 12.6 11.3 ...
## $ measurement_8 : num [1:26570] 20.2 17.9 18.3 19.1 18.1 ...
## $ measurement_9 : num [1:26570] 10.7 12.4 12.7 12.5 10.3 ...
## $ measurement_10: num [1:26570] 15.9 17.9 15.6 16.3 17.1 ...
## $ measurement_11: num [1:26570] 17.6 17.9 NA 18.4 19.9 ...
## $ measurement_12: num [1:26570] 15.2 11.8 13.8 10 12.4 ...
## $ measurement_13: num [1:26570] 15 14.7 16.7 15.2 16.2 ...
## $ measurement_14: num [1:26570] NA 15.4 18.6 15.6 12.8 ...
## $ measurement_15: num [1:26570] 13 14.4 14.1 16.2 13.2 ...
## $ measurement_16: num [1:26570] 14.7 15.6 17.9 17.2 16.4 ...
## $ measurement_17: num [1:26570] 764 682 663 826 580 ...
## $ failure : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 2 1 1 ...
skim(productfailure)
Data summary
| Name |
productfailure |
| Number of rows |
26570 |
| Number of columns |
26 |
| _______________________ |
|
| Column type frequency: |
|
| factor |
4 |
| numeric |
22 |
| ________________________ |
|
| Group variables |
None |
Variable type: factor
| product_code |
0 |
1 |
FALSE |
5 |
C: 5765, E: 5343, B: 5250, D: 5112 |
| attribute_0 |
0 |
1 |
FALSE |
2 |
mat: 21320, mat: 5250 |
| attribute_1 |
0 |
1 |
FALSE |
3 |
mat: 10865, mat: 10362, mat: 5343 |
| failure |
0 |
1 |
FALSE |
2 |
No: 20921, Yes: 5649 |
Variable type: numeric
| id |
0 |
1.00 |
13284.50 |
7670.24 |
0.00 |
6642.25 |
13284.50 |
19926.75 |
26569.00 |
▇▇▇▇▇ |
| loading |
250 |
0.99 |
127.83 |
39.03 |
33.16 |
99.99 |
122.39 |
149.15 |
385.86 |
▃▇▂▁▁ |
| attribute_2 |
0 |
1.00 |
6.75 |
1.47 |
5.00 |
6.00 |
6.00 |
8.00 |
9.00 |
▅▇▁▅▃ |
| attribute_3 |
0 |
1.00 |
7.24 |
1.46 |
5.00 |
6.00 |
8.00 |
8.00 |
9.00 |
▃▃▁▇▃ |
| measurement_0 |
0 |
1.00 |
7.42 |
4.12 |
0.00 |
4.00 |
7.00 |
10.00 |
29.00 |
▆▇▂▁▁ |
| measurement_1 |
0 |
1.00 |
8.23 |
4.20 |
0.00 |
5.00 |
8.00 |
11.00 |
29.00 |
▅▇▃▁▁ |
| measurement_2 |
0 |
1.00 |
6.26 |
3.31 |
0.00 |
4.00 |
6.00 |
8.00 |
24.00 |
▅▇▂▁▁ |
| measurement_3 |
381 |
0.99 |
17.79 |
1.00 |
13.97 |
17.12 |
17.79 |
18.47 |
21.50 |
▁▃▇▃▁ |
| measurement_4 |
538 |
0.98 |
11.73 |
1.00 |
8.01 |
11.05 |
11.73 |
12.41 |
16.48 |
▁▅▇▁▁ |
| measurement_5 |
676 |
0.97 |
17.13 |
1.00 |
12.07 |
16.44 |
17.13 |
17.80 |
21.42 |
▁▁▇▃▁ |
| measurement_6 |
796 |
0.97 |
17.51 |
1.00 |
12.71 |
16.84 |
17.52 |
18.18 |
21.54 |
▁▂▇▅▁ |
| measurement_7 |
937 |
0.96 |
11.72 |
1.00 |
7.97 |
11.04 |
11.71 |
12.39 |
15.42 |
▁▃▇▃▁ |
| measurement_8 |
1048 |
0.96 |
19.02 |
1.01 |
15.22 |
18.34 |
19.02 |
19.71 |
23.81 |
▁▅▇▂▁ |
| measurement_9 |
1227 |
0.95 |
11.43 |
1.00 |
7.54 |
10.76 |
11.43 |
12.10 |
15.41 |
▁▃▇▃▁ |
| measurement_10 |
1300 |
0.95 |
16.12 |
1.41 |
9.32 |
15.21 |
16.13 |
17.02 |
22.48 |
▁▂▇▂▁ |
| measurement_11 |
1468 |
0.94 |
19.17 |
1.52 |
12.46 |
18.17 |
19.21 |
20.21 |
25.64 |
▁▂▇▃▁ |
| measurement_12 |
1601 |
0.94 |
11.70 |
1.49 |
5.17 |
10.70 |
11.72 |
12.71 |
17.66 |
▁▂▇▃▁ |
| measurement_13 |
1774 |
0.93 |
15.65 |
1.16 |
10.89 |
14.89 |
15.63 |
16.37 |
22.71 |
▁▇▇▁▁ |
| measurement_14 |
1874 |
0.93 |
16.05 |
1.49 |
9.14 |
15.06 |
16.04 |
17.08 |
22.30 |
▁▂▇▃▁ |
| measurement_15 |
2009 |
0.92 |
15.00 |
1.55 |
9.10 |
13.96 |
14.97 |
16.02 |
21.63 |
▁▃▇▂▁ |
| measurement_16 |
2110 |
0.92 |
16.46 |
1.71 |
9.70 |
15.27 |
16.44 |
17.63 |
24.09 |
▁▃▇▂▁ |
| measurement_17 |
2284 |
0.91 |
701.27 |
123.30 |
196.79 |
618.96 |
701.02 |
784.09 |
1312.79 |
▁▅▇▁▁ |
productfailure = productfailure %>% dplyr::select(c("failure","product_code","loading","attribute_0","attribute_1","attribute_2","attribute_3","measurement_0","measurement_1","measurement_2","measurement_3","measurement_4","measurement_5","measurement_6","measurement_7","measurement_8","measurement_9","measurement_10","measurement_11","measurement_12","measurement_13","measurement_14","measurement_15","measurement_16","measurement_17"))
set.seed(1234)
imp_prod = mice(productfailure, m=5, method = "pmm", printFlag = FALSE)
## Warning: Number of logged events: 768
summary(imp_prod)
## Class: mids
## Number of multiple imputations: 5
## Imputation methods:
## failure product_code loading attribute_0 attribute_1
## "" "" "pmm" "" ""
## attribute_2 attribute_3 measurement_0 measurement_1 measurement_2
## "" "" "" "" ""
## measurement_3 measurement_4 measurement_5 measurement_6 measurement_7
## "pmm" "pmm" "pmm" "pmm" "pmm"
## measurement_8 measurement_9 measurement_10 measurement_11 measurement_12
## "pmm" "pmm" "pmm" "pmm" "pmm"
## measurement_13 measurement_14 measurement_15 measurement_16 measurement_17
## "pmm" "pmm" "pmm" "pmm" "pmm"
## PredictorMatrix:
## failure product_code loading attribute_0 attribute_1 attribute_2
## failure 0 1 1 1 1 1
## product_code 1 0 1 1 1 1
## loading 1 1 0 1 1 1
## attribute_0 1 1 1 0 1 1
## attribute_1 1 1 1 1 0 1
## attribute_2 1 1 1 1 1 0
## attribute_3 measurement_0 measurement_1 measurement_2
## failure 1 1 1 1
## product_code 1 1 1 1
## loading 1 1 1 1
## attribute_0 1 1 1 1
## attribute_1 1 1 1 1
## attribute_2 1 1 1 1
## measurement_3 measurement_4 measurement_5 measurement_6
## failure 1 1 1 1
## product_code 1 1 1 1
## loading 1 1 1 1
## attribute_0 1 1 1 1
## attribute_1 1 1 1 1
## attribute_2 1 1 1 1
## measurement_7 measurement_8 measurement_9 measurement_10
## failure 1 1 1 1
## product_code 1 1 1 1
## loading 1 1 1 1
## attribute_0 1 1 1 1
## attribute_1 1 1 1 1
## attribute_2 1 1 1 1
## measurement_11 measurement_12 measurement_13 measurement_14
## failure 1 1 1 1
## product_code 1 1 1 1
## loading 1 1 1 1
## attribute_0 1 1 1 1
## attribute_1 1 1 1 1
## attribute_2 1 1 1 1
## measurement_15 measurement_16 measurement_17
## failure 1 1 1
## product_code 1 1 1
## loading 1 1 1
## attribute_0 1 1 1
## attribute_1 1 1 1
## attribute_2 1 1 1
## Number of logged events: 768
## it im dep meth
## 1 1 1 loading pmm
## 2 1 1 loading pmm
## 3 1 1 measurement_3 pmm
## 4 1 1 measurement_3 pmm
## 5 1 1 measurement_4 pmm
## 6 1 1 measurement_4 pmm
## out
## 1 product_codeC, attribute_0material_5, attribute_1material_6, attribute_3, measurement_1
## 2 mice detected that your data are (nearly) multi-collinear.\nIt applied a ridge penalty to continue calculations, but the results can be unstable.\nDoes your dataset contain duplicates, linear transformation, or factors with unique respondent names?
## 3 product_codeB, product_codeE, attribute_1material_5, measurement_0, measurement_2
## 4 mice detected that your data are (nearly) multi-collinear.\nIt applied a ridge penalty to continue calculations, but the results can be unstable.\nDoes your dataset contain duplicates, linear transformation, or factors with unique respondent names?
## 5 product_codeB, product_codeC, attribute_0material_5, attribute_1material_5, measurement_0
## 6 mice detected that your data are (nearly) multi-collinear.\nIt applied a ridge penalty to continue calculations, but the results can be unstable.\nDoes your dataset contain duplicates, linear transformation, or factors with unique respondent names?
densityplot(imp_prod, ~measurement_4)

product_failure_complete = complete(imp_prod)
summary(product_failure_complete)
## failure product_code loading attribute_0
## No :20921 A:5100 Min. : 33.16 material_7:21320
## Yes: 5649 B:5250 1st Qu.:100.02 material_5: 5250
## C:5765 Median :122.46
## D:5112 Mean :128.01
## E:5343 3rd Qu.:149.32
## Max. :385.86
## attribute_1 attribute_2 attribute_3 measurement_0
## material_8:10865 Min. :5.000 Min. :5.00 Min. : 0.000
## material_5:10362 1st Qu.:6.000 1st Qu.:6.00 1st Qu.: 4.000
## material_6: 5343 Median :6.000 Median :8.00 Median : 7.000
## Mean :6.754 Mean :7.24 Mean : 7.416
## 3rd Qu.:8.000 3rd Qu.:8.00 3rd Qu.:10.000
## Max. :9.000 Max. :9.00 Max. :29.000
## measurement_1 measurement_2 measurement_3 measurement_4
## Min. : 0.000 Min. : 0.000 Min. :13.97 Min. : 8.008
## 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:17.11 1st Qu.:11.054
## Median : 8.000 Median : 6.000 Median :17.78 Median :11.735
## Mean : 8.233 Mean : 6.257 Mean :17.79 Mean :11.734
## 3rd Qu.:11.000 3rd Qu.: 8.000 3rd Qu.:18.46 3rd Qu.:12.411
## Max. :29.000 Max. :24.000 Max. :21.50 Max. :16.484
## measurement_5 measurement_6 measurement_7 measurement_8
## Min. :12.07 Min. :12.71 Min. : 7.968 Min. :15.22
## 1st Qu.:16.39 1st Qu.:16.86 1st Qu.:10.978 1st Qu.:18.27
## Median :17.10 Median :17.52 Median :11.686 Median :18.97
## Mean :17.09 Mean :17.54 Mean :11.643 Mean :18.96
## 3rd Qu.:17.79 3rd Qu.:18.19 3rd Qu.:12.364 3rd Qu.:19.68
## Max. :21.43 Max. :21.54 Max. :15.419 Max. :23.81
## measurement_9 measurement_10 measurement_11 measurement_12
## Min. : 7.537 Min. : 9.323 Min. :12.46 Min. : 5.167
## 1st Qu.:10.738 1st Qu.:15.219 1st Qu.:18.12 1st Qu.:10.534
## Median :11.445 Median :16.182 Median :19.23 Median :11.595
## Mean :11.416 Mean :16.125 Mean :19.16 Mean :11.532
## 3rd Qu.:12.094 3rd Qu.:17.058 3rd Qu.:20.24 3rd Qu.:12.643
## Max. :15.412 Max. :22.479 Max. :25.64 Max. :17.663
## measurement_13 measurement_14 measurement_15 measurement_16
## Min. :10.89 Min. : 9.14 Min. : 9.104 Min. : 9.701
## 1st Qu.:14.95 1st Qu.:15.07 1st Qu.:14.037 1st Qu.:15.199
## Median :15.72 Median :16.13 Median :15.057 Median :16.323
## Mean :15.78 Mean :16.09 Mean :15.042 Mean :16.311
## 3rd Qu.:16.54 3rd Qu.:17.13 3rd Qu.:16.014 3rd Qu.:17.514
## Max. :22.71 Max. :22.30 Max. :21.626 Max. :24.094
## measurement_17
## Min. : 196.8
## 1st Qu.: 628.3
## Median : 715.1
## Mean : 734.1
## 3rd Qu.: 813.4
## Max. :1312.8
str(product_failure_complete)
## 'data.frame': 26570 obs. of 25 variables:
## $ failure : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 2 1 1 ...
## $ product_code : Factor w/ 5 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ loading : num 80.1 84.9 82.4 101.1 188.1 ...
## $ attribute_0 : Factor w/ 2 levels "material_7","material_5": 1 1 1 1 1 1 1 1 1 1 ...
## $ attribute_1 : Factor w/ 3 levels "material_8","material_5",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ attribute_2 : num 9 9 9 9 9 9 9 9 9 9 ...
## $ attribute_3 : num 5 5 5 5 5 5 5 5 5 5 ...
## $ measurement_0 : num 7 14 12 13 9 11 12 4 9 10 ...
## $ measurement_1 : num 8 3 1 2 2 4 2 8 6 4 ...
## $ measurement_2 : num 4 3 5 6 8 0 4 8 5 7 ...
## $ measurement_3 : num 18 18.2 18.1 17.3 19.3 ...
## $ measurement_4 : num 12.5 11.5 11.7 11.2 12.9 ...
## $ measurement_5 : num 15.7 17.7 16.7 18.6 17 ...
## $ measurement_6 : num 19.3 17.9 18.2 18.3 15.7 ...
## $ measurement_7 : num 11.7 12.7 12.7 12.6 11.3 ...
## $ measurement_8 : num 20.2 17.9 18.3 19.1 18.1 ...
## $ measurement_9 : num 10.7 12.4 12.7 12.5 10.3 ...
## $ measurement_10: num 15.9 17.9 15.6 16.3 17.1 ...
## $ measurement_11: num 17.6 17.9 16.2 18.4 19.9 ...
## $ measurement_12: num 15.2 11.8 13.8 10 12.4 ...
## $ measurement_13: num 15 14.7 16.7 15.2 16.2 ...
## $ measurement_14: num 14.7 15.4 18.6 15.6 12.8 ...
## $ measurement_15: num 13 14.4 14.1 16.2 13.2 ...
## $ measurement_16: num 14.7 15.6 17.9 17.2 16.4 ...
## $ measurement_17: num 764 682 663 826 580 ...
skim(product_failure_complete)
Data summary
| Name |
product_failure_complete |
| Number of rows |
26570 |
| Number of columns |
25 |
| _______________________ |
|
| Column type frequency: |
|
| factor |
4 |
| numeric |
21 |
| ________________________ |
|
| Group variables |
None |
Variable type: factor
| failure |
0 |
1 |
FALSE |
2 |
No: 20921, Yes: 5649 |
| product_code |
0 |
1 |
FALSE |
5 |
C: 5765, E: 5343, B: 5250, D: 5112 |
| attribute_0 |
0 |
1 |
FALSE |
2 |
mat: 21320, mat: 5250 |
| attribute_1 |
0 |
1 |
FALSE |
3 |
mat: 10865, mat: 10362, mat: 5343 |
Variable type: numeric
| loading |
0 |
1 |
128.01 |
39.20 |
33.16 |
100.02 |
122.46 |
149.32 |
385.86 |
▃▇▂▁▁ |
| attribute_2 |
0 |
1 |
6.75 |
1.47 |
5.00 |
6.00 |
6.00 |
8.00 |
9.00 |
▅▇▁▅▃ |
| attribute_3 |
0 |
1 |
7.24 |
1.46 |
5.00 |
6.00 |
8.00 |
8.00 |
9.00 |
▃▃▁▇▃ |
| measurement_0 |
0 |
1 |
7.42 |
4.12 |
0.00 |
4.00 |
7.00 |
10.00 |
29.00 |
▆▇▂▁▁ |
| measurement_1 |
0 |
1 |
8.23 |
4.20 |
0.00 |
5.00 |
8.00 |
11.00 |
29.00 |
▅▇▃▁▁ |
| measurement_2 |
0 |
1 |
6.26 |
3.31 |
0.00 |
4.00 |
6.00 |
8.00 |
24.00 |
▅▇▂▁▁ |
| measurement_3 |
0 |
1 |
17.79 |
1.00 |
13.97 |
17.11 |
17.78 |
18.46 |
21.50 |
▁▃▇▃▁ |
| measurement_4 |
0 |
1 |
11.73 |
1.00 |
8.01 |
11.05 |
11.73 |
12.41 |
16.48 |
▁▅▇▁▁ |
| measurement_5 |
0 |
1 |
17.09 |
1.01 |
12.07 |
16.39 |
17.10 |
17.78 |
21.42 |
▁▂▇▃▁ |
| measurement_6 |
0 |
1 |
17.54 |
1.02 |
12.71 |
16.86 |
17.52 |
18.19 |
21.54 |
▁▁▇▅▁ |
| measurement_7 |
0 |
1 |
11.64 |
1.08 |
7.97 |
10.98 |
11.69 |
12.36 |
15.42 |
▁▃▇▃▁ |
| measurement_8 |
0 |
1 |
18.96 |
1.05 |
15.22 |
18.27 |
18.97 |
19.68 |
23.81 |
▁▆▇▂▁ |
| measurement_9 |
0 |
1 |
11.42 |
1.00 |
7.54 |
10.74 |
11.45 |
12.09 |
15.41 |
▁▃▇▃▁ |
| measurement_10 |
0 |
1 |
16.13 |
1.40 |
9.32 |
15.22 |
16.18 |
17.06 |
22.48 |
▁▂▇▂▁ |
| measurement_11 |
0 |
1 |
19.16 |
1.58 |
12.46 |
18.12 |
19.23 |
20.24 |
25.64 |
▁▂▇▃▁ |
| measurement_12 |
0 |
1 |
11.53 |
1.65 |
5.17 |
10.53 |
11.60 |
12.64 |
17.66 |
▁▂▇▃▁ |
| measurement_13 |
0 |
1 |
15.78 |
1.23 |
10.89 |
14.95 |
15.72 |
16.54 |
22.71 |
▁▇▇▁▁ |
| measurement_14 |
0 |
1 |
16.09 |
1.47 |
9.14 |
15.07 |
16.13 |
17.13 |
22.30 |
▁▂▇▃▁ |
| measurement_15 |
0 |
1 |
15.04 |
1.51 |
9.10 |
14.04 |
15.06 |
16.01 |
21.63 |
▁▃▇▂▁ |
| measurement_16 |
0 |
1 |
16.31 |
1.79 |
9.70 |
15.20 |
16.32 |
17.51 |
24.09 |
▁▃▇▂▁ |
| measurement_17 |
0 |
1 |
734.15 |
163.43 |
196.79 |
628.26 |
715.09 |
813.43 |
1312.79 |
▁▅▇▂▁ |
productfailure1 = product_failure_complete %>% dplyr::select(c("failure","loading","attribute_0","attribute_1","attribute_2","attribute_3"))
productfailure2 = product_failure_complete %>% dplyr::select(c("failure","measurement_0","measurement_1","measurement_2","measurement_3","measurement_4","measurement_5"))
productfailure3 = product_failure_complete %>% dplyr::select(c("failure","measurement_6","measurement_7","measurement_8","measurement_9","measurement_10","measurement_11"))
productfailure4 = product_failure_complete %>% dplyr::select(c("failure","measurement_12","measurement_13","measurement_14","measurement_15","measurement_16","measurement_17"))
summary(productfailure1)
## failure loading attribute_0 attribute_1
## No :20921 Min. : 33.16 material_7:21320 material_8:10865
## Yes: 5649 1st Qu.:100.02 material_5: 5250 material_5:10362
## Median :122.46 material_6: 5343
## Mean :128.01
## 3rd Qu.:149.32
## Max. :385.86
## attribute_2 attribute_3
## Min. :5.000 Min. :5.00
## 1st Qu.:6.000 1st Qu.:6.00
## Median :6.000 Median :8.00
## Mean :6.754 Mean :7.24
## 3rd Qu.:8.000 3rd Qu.:8.00
## Max. :9.000 Max. :9.00
ggpairs(productfailure1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(productfailure2)
## failure measurement_0 measurement_1 measurement_2 measurement_3
## No :20921 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. :13.97
## Yes: 5649 1st Qu.: 4.000 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:17.11
## Median : 7.000 Median : 8.000 Median : 6.000 Median :17.78
## Mean : 7.416 Mean : 8.233 Mean : 6.257 Mean :17.79
## 3rd Qu.:10.000 3rd Qu.:11.000 3rd Qu.: 8.000 3rd Qu.:18.46
## Max. :29.000 Max. :29.000 Max. :24.000 Max. :21.50
## measurement_4 measurement_5
## Min. : 8.008 Min. :12.07
## 1st Qu.:11.054 1st Qu.:16.39
## Median :11.735 Median :17.10
## Mean :11.734 Mean :17.09
## 3rd Qu.:12.411 3rd Qu.:17.79
## Max. :16.484 Max. :21.43
ggpairs(productfailure2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(productfailure3)
## failure measurement_6 measurement_7 measurement_8 measurement_9
## No :20921 Min. :12.71 Min. : 7.968 Min. :15.22 Min. : 7.537
## Yes: 5649 1st Qu.:16.86 1st Qu.:10.978 1st Qu.:18.27 1st Qu.:10.738
## Median :17.52 Median :11.686 Median :18.97 Median :11.445
## Mean :17.54 Mean :11.643 Mean :18.96 Mean :11.416
## 3rd Qu.:18.19 3rd Qu.:12.364 3rd Qu.:19.68 3rd Qu.:12.094
## Max. :21.54 Max. :15.419 Max. :23.81 Max. :15.412
## measurement_10 measurement_11
## Min. : 9.323 Min. :12.46
## 1st Qu.:15.219 1st Qu.:18.12
## Median :16.182 Median :19.23
## Mean :16.125 Mean :19.16
## 3rd Qu.:17.058 3rd Qu.:20.24
## Max. :22.479 Max. :25.64
ggpairs(productfailure3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(productfailure4)
## failure measurement_12 measurement_13 measurement_14 measurement_15
## No :20921 Min. : 5.167 Min. :10.89 Min. : 9.14 Min. : 9.104
## Yes: 5649 1st Qu.:10.534 1st Qu.:14.95 1st Qu.:15.07 1st Qu.:14.037
## Median :11.595 Median :15.72 Median :16.13 Median :15.057
## Mean :11.532 Mean :15.78 Mean :16.09 Mean :15.042
## 3rd Qu.:12.643 3rd Qu.:16.54 3rd Qu.:17.13 3rd Qu.:16.014
## Max. :17.663 Max. :22.71 Max. :22.30 Max. :21.626
## measurement_16 measurement_17
## Min. : 9.701 Min. : 196.8
## 1st Qu.:15.199 1st Qu.: 628.3
## Median :16.323 Median : 715.1
## Mean :16.311 Mean : 734.1
## 3rd Qu.:17.514 3rd Qu.: 813.4
## Max. :24.094 Max. :1312.8
ggpairs(productfailure4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggcorr(productfailure1, label = "true", label_round = 2)
## Warning in ggcorr(productfailure1, label = "true", label_round = 2): data in
## column(s) 'failure', 'attribute_0', 'attribute_1' are not numeric and were
## ignored

ggcorr(productfailure2, label = "true", label_round = 2)
## Warning in ggcorr(productfailure2, label = "true", label_round = 2): data in
## column(s) 'failure' are not numeric and were ignored

ggcorr(productfailure3, label = "true", label_round = 2)
## Warning in ggcorr(productfailure3, label = "true", label_round = 2): data in
## column(s) 'failure' are not numeric and were ignored

ggcorr(productfailure4, label = "true", label_round = 2)
## Warning in ggcorr(productfailure4, label = "true", label_round = 2): data in
## column(s) 'failure' are not numeric and were ignored

ggcorr(product_failure_complete, label = "true", label_round = 2)
## Warning in ggcorr(product_failure_complete, label = "true", label_round = 2):
## data in column(s) 'failure', 'product_code', 'attribute_0', 'attribute_1' are
## not numeric and were ignored

ggplot(product_failure_complete, aes(x=loading, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$product_code) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##
## A B C D E
## No 0.7727451 0.7996190 0.7883781 0.7824726 0.7930002
## Yes 0.2272549 0.2003810 0.2116219 0.2175274 0.2069998
ggplot(product_failure_complete, aes(x=product_code, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$product_code) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##
## A B C D E
## No 0.7727451 0.7996190 0.7883781 0.7824726 0.7930002
## Yes 0.2272549 0.2003810 0.2116219 0.2175274 0.2069998
ggplot(product_failure_complete, aes(x=attribute_2, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$attribute_2) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##
## 5 6 8 9
## No 0.7883781 0.7878527 0.7996190 0.7727451
## Yes 0.2116219 0.2121473 0.2003810 0.2272549
ggplot(product_failure_complete, aes(x=attribute_0, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$attribute_0) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##
## material_7 material_5
## No 0.7843809 0.7996190
## Yes 0.2156191 0.2003810
ggplot(product_failure_complete, aes(x=attribute_1, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$attribute_1) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##
## material_8 material_5 material_6
## No 0.7810400 0.7911600 0.7930002
## Yes 0.2189600 0.2088400 0.2069998
ggplot(product_failure_complete, aes(x=attribute_3, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$attribute_3) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##
## 5 6 8 9
## No 0.7727451 0.7824726 0.7937358 0.7930002
## Yes 0.2272549 0.2175274 0.2062642 0.2069998
ggplot(product_failure_complete, aes(x=measurement_0, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$measurement_0) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##
## 0 1 2 3 4 5 6
## No 0.8159851 0.7834793 0.7800797 0.8018610 0.7864035 0.7876209 0.7819602
## Yes 0.1840149 0.2165207 0.2199203 0.1981390 0.2135965 0.2123791 0.2180398
##
## 7 8 9 10 11 12 13
## No 0.7844796 0.7928368 0.8037699 0.7884058 0.7682495 0.7877095 0.7890724
## Yes 0.2155204 0.2071632 0.1962301 0.2115942 0.2317505 0.2122905 0.2109276
##
## 14 15 16 17 18 19 20
## No 0.7839506 0.8004386 0.7520891 0.7423581 0.7839196 0.6991150 0.7848101
## Yes 0.2160494 0.1995614 0.2479109 0.2576419 0.2160804 0.3008850 0.2151899
##
## 21 22 23 24 25 26 27
## No 0.8113208 0.8684211 0.7647059 0.7083333 0.5714286 1.0000000 1.0000000
## Yes 0.1886792 0.1315789 0.2352941 0.2916667 0.4285714 0.0000000 0.0000000
##
## 29
## No 1.0000000
## Yes 0.0000000
ggplot(product_failure_complete, aes(x=measurement_1, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$measurement_1) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##
## 0 1 2 3 4 5 6
## No 0.7671958 0.7411945 0.8031496 0.7930781 0.7766821 0.7864955 0.7827368
## Yes 0.2328042 0.2588055 0.1968504 0.2069219 0.2233179 0.2135045 0.2172632
##
## 7 8 9 10 11 12 13
## No 0.7825235 0.7893724 0.7899873 0.7793202 0.7973046 0.8000000 0.8057296
## Yes 0.2174765 0.2106276 0.2100127 0.2206798 0.2026954 0.2000000 0.1942704
##
## 14 15 16 17 18 19 20
## No 0.7892157 0.7927786 0.8004246 0.7634069 0.8018868 0.7810651 0.7659574
## Yes 0.2107843 0.2072214 0.1995754 0.2365931 0.1981132 0.2189349 0.2340426
##
## 21 22 23 24 25 26 27
## No 0.7692308 0.8750000 0.8235294 0.7333333 0.8750000 1.0000000 0.5000000
## Yes 0.2307692 0.1250000 0.1764706 0.2666667 0.1250000 0.0000000 0.5000000
##
## 28 29
## No 1.0000000 0.0000000
## Yes 0.0000000 1.0000000
ggplot(product_failure_complete, aes(x=measurement_2, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$measurement_2) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##
## 0 1 2 3 4 5 6
## No 0.7799043 0.7973856 0.8044597 0.7808511 0.7871846 0.7960506 0.7984152
## Yes 0.2200957 0.2026144 0.1955403 0.2191489 0.2128154 0.2039494 0.2015848
##
## 7 8 9 10 11 12 13
## No 0.7752420 0.7793944 0.7856000 0.7972696 0.8030457 0.7717087 0.7522124
## Yes 0.2247580 0.2206056 0.2144000 0.2027304 0.1969543 0.2282913 0.2477876
##
## 14 15 16 17 18 19 20
## No 0.7713178 0.7414966 0.7472527 0.7014925 0.7567568 0.6842105 0.6250000
## Yes 0.2286822 0.2585034 0.2527473 0.2985075 0.2432432 0.3157895 0.3750000
##
## 21 22 23 24
## No 0.6250000 0.0000000 1.0000000 0.5000000
## Yes 0.3750000 1.0000000 0.0000000 0.5000000
ggplot(product_failure_complete, aes(x=measurement_3, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_4, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_5, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_6, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_7, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_8, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_9, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_10, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_11, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_12, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_13, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_14, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_15, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_16, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_17, fill = failure)) + geom_bar(position="fill") + theme_bw()
